
************************************************************************************************
* Constructing data set to "Public Health Policy At Scale.."
************************************************************************************************

************************************************************************************************
* Full population born 1970-2014 from birth registries
*************************************************************************************************

global rawStata "X:\Data\Workdata\707116\Stata"
global temp2 "X:\Data\Workdata\707116\ChildVax\02_temp\Nov2019" 
global sundformat "D:Data\formater\Stata\formats\Sundhed"

set mem 2g
set more off
set linesize 255

*********************************************************************************************
* mortality includes deaths in the population born 1970-2014 from birth registries
*********************************************************************************************
* Dodsaars2001.dta includes all cause and manner/type of death registred 1970-2001. CDOD1 is the primary cause of death
* dodsaasg2017.dta includes all causes of death registred from 1970-2017. C_DODTILGRUNDL_ACME is the primary cause of death 
* DODSAASG2016.dta includes manner/type of death registred 2002-2016 
*********************************************************************************************

clear all

use $temp2\mortality.dta, clear 
g dyear=year(deathdate)
merge 1:m pnr using $rawStata\dodsaars2001.dta 
replace dyear=year(D_DODSDTO) if dyear==.
tab dyear _merge //

keep if _merge!=2 & dyear<2002 
drop _merge 

sort pnr C_DOD1 
by pnr: g i=_n 
by pnr: g j=_N

tab i j, mi
list if j>1
 
drop if C_DOD1=="0000" & j>1
by pnr: g k=_N 
assert k==1 
drop i j k  

tab dyear C_DODSMAADE, mi
***create the relevant variables*** 
*natural death* 
*mainly accident, suicide and homocide
g 		nat_death=0 
replace nat_death=1 if  C_DODSMAADE=="1"
replace nat_death=. if  C_DODSMAADE=="-" | C_DODSMAADE==""

tab C_DODSMAADE nat_death, mi
***cause of death***



/********* coding groups of the five most frequent causes of death for Infant and child mortality *****/

/* inf: infectious and parasitic disease */
/* neo: Neoplasms */
/* nerv: Diseases of the nervous system and sense organs */
/* resp: Diseases of the respiratory system (incl. pneumonia) */  
/* pre_nat: Certain conditions originating in pre-natal periods */
/* cog_mal: Congenital malformation */
/* sids: Sudden infant death syndrome */
/* unknown: Unknown or unspecific causes */

/* Top 5 for <1 yr is: 1)pre_nat, 2)cog_mal, 3)unknown, 4)sids, 5) nerv, */
/* Top 5 for 1-4 yr is: 1) cog_mal, 2) neo, 3) nerv, 4)inf, 5) resp */


cap 	prog drop dc

prog 	def dc
gen  dc_`1' =0
replace dc_`1' =1 if ((C_DOD1>="`2'" & C_DOD1<="`3'") & dyear<1994)
replace dc_`1' =1 if ((C_DOD1>="`4'" & C_DOD1<="`5'") & dyear>=1994)
end

dc "inf" "0000" "1399" "A00" "B999"
dc "neo" "1400" "2389" "C00" "C999"
dc "nerv" "3200" "3899" "G00" "H959"
dc "resp" "4600" "5199" "J00" "J999"
dc "pre_nat" "7600" "7799" "P00" "P969"
dc "cog_mal" "7400" "7599" "Q00" "Q999"
dc "sids" "7950" "7959" "R950" "R959" // In the Danish version for sids ICD8 codes 795.0 and 795.1
dc "unknown" "7960" "7969" "R960" "R999"

g dc_mis=(C_DOD1=="")


egen flag=rowtotal(dc_inf-dc_mis)
g dc_oth = (flag==0)
sum dc_*



keep pnr deathdate dead nat_death dyear dc_*
tempfile t1
save `t1'

use $temp2\mortality.dta, clear 
g dyear=year(deathdate)
*merge 1:m pnr using $rawStata\DODSAASG2016.dta
merge 1:m pnr using $rawStata\dodsaasg2017.dta 
tab dyear _merge
keep if _merge!=2 & dyear>2001 
tab _merge
drop _merge 

sort pnr 
by pnr: g i=_n 
by pnr: g j=_N

tab i j, mi
list if j>1
drop i j 

g 		nat_death=0 
replace nat_death=1 if  C_DODSMAADE=="5"
replace nat_death=. if   C_DODSMAADE=="" | C_DODSMAADE=="4"


/********* coding groups of the five most frequent causes of death for Infant and child mortality *****/

/* inf: infectious and parasitic disease */
/* neo: Neoplasms */
/* nerv: Diseases of the nervous system and sense organs */
/* resp: Diseases of the respiratory system (incl. pneumonia) */  
/* pre_nat: Certain conditions originating in pre-natal periods */
/* cog_mal: Congenital malformation */
/* sids: Sudden infant death syndrome */
/* unknown: Unknown or unspecific causes */

/* Top 5 for <1 yr is: 1)pre_nat, 2)cog_mal, 3)unknown, 4)sids, 5) nerv, */
/* Top 5 for 1-4 yr is: 1) cog_mal, 2) neo, 3) nerv, 4)inf, 5) resp */


cap 	prog drop dc

prog 	def dc
gen  dc_`1' =0
replace dc_`1' =1 if ((C_DODTILGRUNDL_ACME>="`2'" & C_DODTILGRUNDL_ACME<="`3'") & dyear<1994)
replace dc_`1' =1 if ((C_DODTILGRUNDL_ACME>="`4'" & C_DODTILGRUNDL_ACME<="`5'") & dyear>=1994)
end

dc "inf" "0000" "1399" "A00" "B999"
dc "neo" "1400" "2389" "C00" "C999"
dc "nerv" "3200" "3899" "G00" "H959"
dc "resp" "4600" "5199" "J00" "J999"
dc "pre_nat" "7600" "7799" "P00" "P969"
dc "cog_mal" "7400" "7599" "Q00" "Q999"
dc "sids" "7950" "7959" "R950" "R959" // In the Danish version for sids ICD8 codes 795.0 and 795.1
dc "unknown" "7960" "7969" "R960" "R999"

g dc_mis=(C_DODTILGRUNDL_ACME=="")

egen flag=rowtotal(dc_inf-dc_mis)
g dc_oth = (flag==0)
sum dc_*




keep pnr deathdate dead nat_death dyear dc_*
append using `t1'

save $temp2\mortality2.dta, replace 

